import ma.const

import random
import math
import os
import sys

# Akamai DNS occurence probability
if len(sys.argv) != 4:
    print('Usage: python akamaigen.py [mean] [std_dev] [starting file no]')
    mean = 20
    std_dev = 17
    starting_file_no = 0
    print('Taking %d as mean and %d as std-dev' % (mean, std_dev))
else:
    mean = int(sys.argv[1])
    std_dev = int(sys.argv[2])
    starting_file_no = int(sys.argv[3])

job_id = 14
dest_dir = "/state/partition1/datasets/" + str(job_id)

# ensure the destination directory exists
if not os.path.exists(dest_dir):
    os.makedirs(dest_dir)

dns_entries_per_site = 1700000
files_per_map = 1
# maps = 6 * 39 * 4
maps = 62
sites = files_per_map * maps
akamai_link = "e526.d.akamaiedge.net"
other_link = "junk.microsoft.com"
sep = "\n"

akamai_len = len(akamai_link) * (dns_entries_per_site * 0.2)
other_len = len(other_link) * (dns_entries_per_site * 0.8)
file_len = akamai_len + other_len
print("File length for DNS entries:", file_len)
print("Total data-input size:", file_len * files_per_map * maps)

def listMeanStdDev():
    std_dev_lin = float(std_dev) / (sites - 1)
    std_devs = [0.0]
    for i in range(sites-1):
        std_devs.append(std_devs[-1] + std_dev_lin)
    
    std_devs.reverse() 
    
    list_nums = []
    y = sites
    minus = -1
    
    # this loop creates a list of numbers whose mean adn std dev approaches 
    #    those that are provided as arguments 
    for i in range(sites-1):
        curr_std_dev = y * (std_devs[i] ** 2)
        fut_std_dev = (y - 1) * (std_devs[i+1] ** 2)
        diff = curr_std_dev - fut_std_dev
        curr_num = int(round(((diff ** 0.5) * minus) + mean))
        list_nums.append(curr_num)
        y -= 1
        minus *= -1
    
    list_nums.append(mean)
    return list_nums


def computeMeanStdDev():
    print("computing std dev", len(list_nums))
    m = float(sum(list_nums)) / len(list_nums)
    print(m)
    s = 0
    for i in list_nums:
        s += (i - m) ** 2
    
    s = float(s) / len(list_nums)
    print(s ** 0.5)


print("Producing numbers following mean:", mean, "std-dev:", std_dev)
list_nums = listMeanStdDev()
print("Shuffling")
random.shuffle(list_nums)

print("Generating %d files" % sites)
x = 0
for file in range(sites):
    filename = ma.const.JobsXmlData.get_str_data(ma.const.xml_map_input_filename, job_id, starting_file_no + file)
    filepath = dest_dir + os.sep + filename
    print(filepath)
    fd = open(filepath, 'w+')
    
    ak = int(dns_entries_per_site * (float(list_nums[x]) / 100))
    oth = int(dns_entries_per_site * (float(100 - list_nums[x]) / 100))
    
    for i in range(ak):
        fd.write(akamai_link + sep)
    
    for i in range(oth):
        fd.write(other_link + sep)
    
    fd.close()
    x += 1
    